#!/usr/bin/python
# -*- coding: latin1 -*-
###########################
### Autor: Sebastian Enger / M.Sc.
### Copyright: Sebastian Enger
### Licence: Commercial / OneTipp
### Version: 1.0.4  - 12-10-2015@22:53 Uhr
### Contact: sebastian.enger@gmail.com
### OneTipp Text Tool in Python
###########################
######## export PYTHON_EGG_CACHE=/tmp
import os
import pprint
import nltk
# import rocksdb                                         # shared library kann aktuell noch nicht gelesen werden
import MySQLdb  # apt-get install python-mysqldb
from sphinxit.core.processor import Search  # http://sphinxit.readthedocs.org/en/latest/
from sphinxit.core.helpers import BaseSearchConfig
import random
import codecs
import sys
import unicodedata
os.environ['PYTHON_EGG_CACHE'] = '/tmp'
from nltk.tokenize import sent_tokenize
###python -m nltk.downloader -d /usr/share/nltk_data all
####python -m nltk.downloader all
###########nltk.download()
reload(sys)
sys.setdefaultencoding('latin-1')
class SphinxitConfig(BaseSearchConfig):
    DEBUG = False
    WITH_META = False
    WITH_STATUS = False
    POOL_SIZE = 5
    # SQL_ENGINE = 'oursql'
    SEARCHD_CONNECTION = {
        'host': '127.0.0.1',
        'port': 9977,
    }
pp = pprint.PrettyPrinter(indent=4)
# delimiters      = ['\n', ' ', ',', '.', '?', '!', ':', ';', '\s', '\t', '\r']
# http://pyrocksdb.readthedocs.org/en/v0.4/tutorial/index.html
# https://github.com/sphinxsearch/sphinx/blob/master/api/sphinxapi.py
# http://www.tutorialspoint.com/python/python_database_access.htm
# mysql = MySQLdb.connect("localhost","root","###########99","onetipp" ) # last working
sphinx = MySQLdb.connect(
    host='127.0.0.1',
    user='root',
    passwd='###########99',
    db='onetipp',
    port=9977)  # sphinxQL
cursorSphinx = sphinx.cursor()
mysql = MySQLdb.connect(
    host='127.0.0.1',
    user='root',
    passwd='###########99',
    db='onetipp',
    port=3306)  # Mysql
cursorMysql = mysql.cursor()
def deumlaut(s):
    """
    Replaces umlauts with fake-umlauts
    """
    s = s.replace('\xdf', 'ss')
    s = s.replace('\xfc', 'ue')
    s = s.replace('\xdc', 'Ue')
    s = s.replace('\xf6', 'oe')
    s = s.replace('\xd6', 'Oe')
    s = s.replace('\xe4', 'ae')
    s = s.replace('\xc4', 'Ae')
    return s
inputfile = sys.argv[1]
outputfile = sys.argv[2]
# http://www.tutorialspoint.com/python/python_command_line_arguments.htm
# read file into string
text = open(inputfile, 'r').read()
text.decode('latin-1')
# sent_tokenize_list = sent_tokenize(text)
tokens = nltk.word_tokenize(text)
# pp.pprint(tokens)
count = -1
for word in tokens:
    count += 1
    lstcWord = word[0:1]
    # if word.istitle():
    # if lstcWord.isupper():
    if len(word) >= 5:
        # 1. check if NamensDB eintrag -> y: write protect this entry
        # 2. check if Synonym_Unique -> y: take syononmy rand[0-4] -> 4 if > then 4 synonyms
        search_query = Search(indexes=['onetipp_name'], config=SphinxitConfig)
        # search_query = search_query.match(word).options(
        search_query = search_query.match(word).options(
            ranker='proximity_bm25',
            max_matches=1,
            max_query_time=350,
            field_weights={'name': 100, 'gender': -10000, 'language': -10000, 'meaning': -10000},
        )
        ###sphinx_result = search_query.ask()
        # pp.pprint(sphinx_result)
        # exit(0)
        #cursorMysql.execute("SELECT * FROM (namen_table) WHERE name LIKE '%s%%' LIMIT 1;" % (word))
        cursorMysql.execute("SELECT * FROM (namen_table) WHERE name LIKE '%s' LIMIT 1;" % (word))
        name_content = cursorMysql.fetchone()
        # print word +" = WORT und NAMENHIT =", name_content
        # print "\n"
        # exit(0)
        skip = 0
        # es wurde ein namen gefunden -> kein synonym austauschen
        # print "Skip Name ID pre: " , skip
        # print "
"
        if name_content is None:
            # skip = sphinx_result['result']['items'][0].values()[0]
            #    print word + " >>>> Skip Name ID nachdem gucken ob NamensDB Match: " , skip
            #    print "
"
            # es wurde KEIN namen gefunden -> synonym austauschen
            #    print "(YES) Skip Name ID Wir können Synonym Match Starten: " , skip
            #    print "
"
            search_query_syn = Search(indexes=['onetipp_syn_simple'], config=SphinxitConfig)
            search_query_syn = search_query_syn.match(word).options(
                ranker='proximity_bm25',
                max_matches=1,
                max_query_time=350,
                field_weights={'synonyms': 100},
            )
            sphinx_result_syn = search_query_syn.ask()
            # pp.pprint(sphinx_result_syn)
            # http://stackoverflow.com/questions/7971618/python-return-first-n-keyvalue-pairs-from-dict
            #    print "es wurde kein name gefunden: "
            synID = 0
            try:
                synID = sphinx_result_syn['result']['items'][0].values()[0]
                if synID > 0:
                    #    print "SynDB has been found: ", synID
                    sql = "SELECT synonyms FROM (synonym_unique_simple) WHERE uid= %s" % (synID)
                    cursorMysql.execute(sql)
                    syn_content = cursorMysql.fetchone()
                    if syn_content:
                        synwords = syn_content[0].split(";")
                        # if first char of syn is uppercase than take it
                        # http://www.tutorialspoint.com/python/python_basic_operators.htm
                        for cSyn in synwords:
                            if len(cSyn)< 25:
                                #    print word + " = Originalwort -<>- Synonym > " + cSyn +"
"
                                lstcSyn = cSyn[0:1]
                                cSyn = deumlaut(cSyn)
                                if lstcSyn.isupper() and lstcWord.isupper():
                                    tokens[
                                        count] = '' + cSyn + ''
                                    #   print "BIG HIT: " + cSyn + "
"
                                    break
                                elif lstcSyn.islower() and lstcWord.islower():
                                    tokens[
                                        count] = '' + cSyn + ''
                                    #   print "small hit: " + cSyn + "
"
                                    break
            except IndexError:
                print
        else:
            if lstcWord.isupper():
                tokens[count] = '' +word+ ''
                #print "Namen erkannt und nicht getauscht"
# file schreiben
outputtext = ' '.join(tokens)
with codecs.open(outputfile, 'w') as f:
    f.write(outputtext)
    f.close()
mysql.close()
# print outputtext
exit(0);